{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install openai python-dotenv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from openai import AzureOpenAI\n",
    "import numpy as np\n",
    "from dotenv import load_dotenv\n",
    "load_dotenv()\n",
    "\n",
    "client = AzureOpenAI(\n",
    "  api_key = os.getenv(\"AZURE_OPENAI_KEY\"),  \n",
    "  api_version = \"2023-05-15\",\n",
    "  azure_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Dependencies for embeddings_utils\n",
    "%pip install matplotlib plotly scikit-learn pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cosine_similarity(a, b):\n",
    "    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = 'the quick brown fox jumped over the lazy dog'\n",
    "model = 'text-embedding-ada-002'\n",
    "\n",
    "client.embeddings.create(input = [text], model=model).data[0].embedding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# compare several words\n",
    "automobile_embedding    = client.embeddings.create(input = 'automobile', model=model).data[0].embedding\n",
    "vehicle_embedding       = client.embeddings.create(input = 'vehicle', model=model).data[0].embedding\n",
    "dinosaur_embedding      = client.embeddings.create(input = 'dinosaur', model=model).data[0].embedding\n",
    "stick_embedding         = client.embeddings.create(input = 'stick', model=model).data[0].embedding\n",
    "\n",
    "# comparing cosine similarity, automobiles vs automobiles should be 1.0, i.e exactly the same, while automobiles vs dinosaurs should be between 0 and 1, i.e. not the same\n",
    "print(cosine_similarity(automobile_embedding, automobile_embedding))\n",
    "print(cosine_similarity(automobile_embedding, vehicle_embedding))\n",
    "print(cosine_similarity(automobile_embedding, dinosaur_embedding))\n",
    "print(cosine_similarity(automobile_embedding, stick_embedding))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
